from IPython.display import Image
Image(filename="water_pic.png")
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter, MultipleLocator
#format tick labels as thousands (K)
def thousands_formatter(x, pos):
return f'{x / 1e3:.0f}K'
#format tick labels as millions
def millions_formatter(x, pos):
return f'{x / 1e6:.0f}M'
data = pd.read_csv("Per_capita2.csv")
countries_to_plot = ['Australia']
# Create a figure and axis
for country in countries_to_plot:
plt.figure(figsize=(14, 8))
# Filter data for the current country starting from 1965
country_data = data[(data['country'] == country) & (data['year'] >= 1965)]
# Bar Chart
bar = plt.bar(country_data['year'], country_data['per_capita'], color='grey', label='Per Capita')
plt.xlabel('Year')
plt.ylabel('Water Sources Per Capita (in billion m³)')
plt.title(f'Water Availability Over Years - {country}')
plt.grid(True)
# Customize y-axis tick labels
plt.gca().yaxis.set_major_formatter(FuncFormatter(thousands_formatter))
# Line Chart
ax2 = plt.gca().twinx()
line = ax2.plot(country_data['year'], country_data['population'], marker='o', color='red', linestyle='-', linewidth=2, markersize=6, label='Population')
ax2.set_ylabel('Population (Millions)')
# Set y-axis tick label formatter for the line chart
ax2.yaxis.set_major_formatter(FuncFormatter(millions_formatter))
# Set x-axis tick interval to every 5 years
ax2.xaxis.set_major_locator(MultipleLocator(base=5))
# Get handles and labels for the line chart legend only
line_handles, line_labels = ax2.get_legend_handles_labels()
# Show legend for the line chart
plt.legend(handles=line_handles, labels=line_labels, loc='upper left', bbox_to_anchor=(0.75, 1.0))
plt.tight_layout()
plt.show()
import pandas as pd
data = pd.read_csv("Per_capita2.csv")
per_capita_difference = data.groupby('country').apply(lambda x: x['per_capita'].iloc[-1] - x['per_capita'].iloc[0])
sorted_countries = per_capita_difference.sort_values(ascending=False)
print("Countries with the biggest decrease in per capita water resources:")
print(sorted_countries.head())
Countries with the biggest decrease in per capita water resources: country Congo, Rep. 424206.10940 Iceland 399686.47630 Bhutan 283583.85120 Solomon Islands 271623.84635 Papua New Guinea 239530.79763 dtype: float64
import pandas as pd
data = pd.read_csv("Per_capita2.csv")
per_capita_difference = data.groupby('country').apply(lambda x: x['per_capita'].iloc[-1] - x['per_capita'].iloc[0])
sorted_countries = per_capita_difference.sort_values()
countries_with_biggest_increase = sorted_countries.head(3).index
print("Countries with the biggest increase in per capita water resources:")
print(countries_with_biggest_increase)
Countries with the biggest increase in per capita water resources: Index(['Latvia', 'Georgia', 'Croatia'], dtype='object', name='country')
import pandas as pd
import matplotlib.pyplot as plt
# Load the data from the CSV file
data = pd.read_csv("Per_capita2.csv")
# Calculate the per capita difference for each country
per_capita_difference = (data.groupby('country')['per_capita'].last()
- data.groupby('country')['per_capita'].first())
# Sort the countries based on the per capita difference
sorted_countries = per_capita_difference.sort_values()
# Plot the data
plt.figure(figsize=(10, 6))
sorted_countries.plot(kind='bar', color='red')
plt.xlabel('Country')
plt.ylabel('Per Capita Difference')
plt.title('Countries with the Biggest Decrease in Per Capita Water Resources')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
data = pd.read_csv("Per_capita2.csv")
per_capita_difference = data.groupby('country').apply(lambda x: x['per_capita'].iloc[-1] - x['per_capita'].iloc[0])
label_prefixes = ['First', 'Second', 'Third']
top_decrease_countries = per_capita_difference.sort_values().head(3).index
smallest_decrease_countries = per_capita_difference.sort_values(ascending=False).head(3).index
# Create a function to format tick labels as thousands (K)
def thousands_formatter(x, pos):
return f'{x / 1e3:.0f}K'
plt.figure(figsize=(10, 6))
top_increase_colors = ['green', 'olive', 'black']
top_decrease_colors = ['red', 'orange', 'yellow']
for i, country in enumerate(top_decrease_countries):
country_data = data[data['country'] == country]
plt.scatter(country_data['year'], country_data['per_capita'], color=top_increase_colors[i], label=f'{label_prefixes[i]} Biggest Increase: {country}')
for i, country in enumerate(smallest_decrease_countries):
country_data = data[data['country'] == country]
plt.scatter(country_data['year'], country_data['per_capita'], color=top_decrease_colors[i],
label=f'{label_prefixes[i]} Biggest Decrease: {country}')
plt.xlabel('Year')
plt.ylabel('Water Sources Per Capita (in billion m³)')
plt.title('Water Availability Over Years - Per Capita Differences')
# Apply the custom thousands formatter to y-axis
plt.gca().yaxis.set_major_formatter(FuncFormatter(thousands_formatter))
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
Water Withdrawls 🌍💧¶import matplotlib.pyplot as plt
import pandas as pd
import os
import seaborn as sns
if os.path.isfile("water_withdrawal_by_sector.csv"):
filepath = "water_withdrawal_by_sector.csv"
print("Loading from file")
else:
filepath = "https://datatopics.worldbank.org/sdgatlas/data/goal06/goal06.water_withdrawal_by_sector.csv"
print("Loading from the internet")
water_usage_data = pd.read_csv(filepath)
income_categories = ["HIC", "LIC", "LMC", "UMC"]
sectors = ['agriculture', 'domestic', 'industry']
# List of region names to be removed from x-axis
regions_to_remove = ['East Asia & Pacific', 'Europe & Central Asia', 'Latin America & Caribbean',
'Middle East & North Africa', 'North America', 'South Asia',
'Sub-Saharan Africa', 'World']
# Filter out specific regions from the data
filtered_data = water_usage_data[~water_usage_data['region_name'].isin(regions_to_remove)]
# Melt the data to create a suitable format for Seaborn
melted_data = pd.melt(filtered_data, id_vars=['region_name', 'region'], value_vars=sectors, var_name='sector')
# Set the style
sns.set(style="whitegrid")
# Create a grouped bar chart
plt.figure(figsize=(10, 6))
sns.barplot(data=melted_data, x='region_name', y='value', hue='sector', palette='Set2')
plt.title('Water Withdrawal by Sectors for Different Income Categories')
plt.ylabel('Percentage')
plt.xlabel('Countries')
plt.xticks(rotation=45)
plt.legend(title='Sector')
plt.tight_layout()
plt.show()
Loading from file
import matplotlib.pyplot as plt
import pandas as pd
import os
import seaborn as sns
if os.path.isfile("water_withdrawal_by_sector.csv"):
filepath = "water_withdrawal_by_sector.csv"
print("Loading from file")
else:
filepath = "https://datatopics.worldbank.org/sdgatlas/data/goal06/goal06.water_withdrawal_by_sector.csv"
print("Loading from the internet")
percent_data = pd.read_csv(filepath)
# Exclude the rows
filtered_data = percent_data[~percent_data['region'].isin(['HIC', 'LIC','LMC', 'UMC'])]
# Set the style
sns.set(style="whitegrid")
# Create a stacked area chart for different sectors over time
sectors = ['agriculture', 'domestic', 'industry']
plt.figure(figsize=(10, 6))
sns.lineplot(data=filtered_data, x='region_name', y=sectors[0], label=sectors[0])
sns.lineplot(data=filtered_data, x='region_name', y=sectors[1], label=sectors[1])
sns.lineplot(data=filtered_data, x='region_name', y=sectors[2], label=sectors[2])
plt.fill_between(filtered_data['region_name'], filtered_data[sectors[0]], color='blue', alpha=0.3)
plt.fill_between(filtered_data['region_name'], filtered_data[sectors[1]], color='green', alpha=0.3)
plt.fill_between(filtered_data['region_name'], filtered_data[sectors[2]], color='orange', alpha=0.3)
plt.title('Water Withdrawal by Sectors')
plt.ylabel('Percentage')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.legend(title='Sector')
plt.tight_layout()
plt.show()
Loading from file
Water Stress ⚠️🌎¶The water stress is calculated by dividing the total freshwater withdrawals by the available renewable freshwater.As a country's water usage approaches the 100 percent,he pressure on its water resources intensifies. In some cases, the value of 100 percent can even be exceeded. This happens when a country consumes water resources that are non-renewable. While low water stress indicates sustainable usage, it is important to note that this is not necessarily a positive outcome:. It can sometimes highlight a nation's struggle to effectively govern its water resources for the benefit of its population. In some low-income countries, water stress levels are very low, but access levels are also low
import geopandas as gpd
import plotly.express as px
import pandas as pd
water_stress_data = pd.read_csv('country_level_water_stress.csv')
shapefile_path = 'ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp'
world = gpd.read_file(shapefile_path)
# Choose the correct ISO country code column name
correct_iso_column_name = 'ADM0_A3'
# Merge shapefile with water stress data based on the correct ISO column name
merged_data = world.merge(water_stress_data, left_on=correct_iso_column_name, right_on='iso', how='left')
# Create an interactive map with hover effect using plotly express
fig = px.choropleth(merged_data,
geojson=merged_data.geometry,
locations=merged_data.index,
color='water_stress',
color_continuous_scale='OrRd',
hover_name='ADMIN', # Include country names in hover
hover_data={'water_stress': ':.2f'},
title='Water Stress in Countries',
labels={'water_stress': 'Water Stress (%)'},
color_continuous_midpoint=100, # Set midpoint for color scale
range_color=(0, 100)) # Set range of colors
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(height=600)
fig.show()
If we focus on the Middle East and North Africa region, you'll notice that countries there are dealing with critical or high water stress. Why? Well, it's because water resources are pretty scarce in that area. The population of these countries has nearly quadrupled over the past 50 years and is projected to keep increasing. The growing demand for water puts pressure on the Nile River Basin, where water flows are already fully used to meet various domestic, agricultural and industrial demands.
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("Per_capita2.csv")
countries_to_plot = ['Libya', 'Egypt, Arab Rep.', 'Algeria', 'Saudi Arabia'] # Change to the desired countries
plt.figure(figsize=(14, 8))
# Create a histogram for each country's population
for country in countries_to_plot:
# Filter data for the current country for years 1965 and 2020
country_data = data[(data['country'] == country) & (data['year'].isin([1965, 2020]))]
# Create a list of labels with years and countries
labels = [f'{year} - {country}' for year in country_data['year']]
# Vertical bar chart (swapping y and x axes)
plt.bar(labels, country_data['population'] / 1e6, alpha=0.5, label=f'{country} Population')
# Customize the plot
plt.xlabel('Years - Countries')
plt.ylabel('Population (Millions)')
plt.title('Population Distribution by Country and Year')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
Looking Forward ↗️🔱¶Water is a precious resource. Amid pressures from human activities, and uncertainties linked to climate change, robust water management will be critical to the sustainable use of freshwater resources. This is especially important when considering the interdependencies that exist between water, food production, energy, ecosystems, and economic activities. Thus, lets the first step towards a water-efficient future by integrating easy water-saving habits into our daily lives. Thank you. 💧🌿